# Figure b2
rm(list=ls())

(WD <- getwd())
if (!is.null(WD)) setwd(WD)

indata0      = paste0(WD,"/_individual_data/_otherrawdata/","", collapse = NULL)
indata1      = paste0(WD,"/_other_data/_public_signals/_hard_variables/","", collapse = NULL)
indata2      = paste0(WD,"/_other_data/_public_signals/_survey_variables/","", collapse = NULL)

library(haven)
library(AER)
library(sandwich)
library(lmtest)
library(pracma)
library(stargazer)
library(plm)
library(pracma)
library(DataCombine)
library(jtools)
library(plyr)
library(ggplot2)
library(tidyverse)
library(dotwhisker)

lagpad <- function(x, k) {
  res <- c(rep(NA, k), x)[1:length(x)]
  return(res)
}

n_hard    = 7
n_survey  = 3
cut       = 1000

load(paste(indata0,"ea_spf_cpi_ind.Rda",sep=""))
data$ID         = data$id
data            = data[data$qdate<=2020.00,]
data            = ddply(data,.(qdate),transform, cons = mean(f2,na.rm = TRUE))
data_fcast      = subset(data, select = c(qdate,fc_err, ID, p_realz))

data_tmp        = aggregate(. ~ qdate, data_fcast, mean)
data_tmp$lag    = lagpad(data_tmp$p_realz,5)
data_tmp        = subset(data_tmp, select = c(qdate, lag))

load(paste(indata1,"ea_public_signals_hard.Rda",sep=""))
data_hard       = data
data_hard$qdate = data_hard$qdate+5/4 
data_hard       = merge(data_hard, data_fcast, by = "qdate", all = TRUE)
data_hard       = merge(data_hard, data_tmp, by= "qdate", all = TRUE)

load(paste(indata2,"ea_public_signals_survey.Rda",sep=""))
data_survey       = data
data_survey$qdate = data_survey$qdate+5/4
data_survey       = merge(data_survey, data_fcast, by = "qdate", all = TRUE)

iter          = 0
out_beta_hard  = rep(NA, n_hard)
out_se_hard    = rep(NA, n_hard)
out_n_hard     = rep(NA, n_hard)
out_t_hard     = rep(NA, n_hard)
beta_save      = rep(NA,n_hard)

for(ii in colnames(data_hard)) {
  if (ii != "qdate"  && ii != "fc_err" && ii != "ID" && ii != "p_realz") { 
      iter = iter + 1
      print(ii)
    
      data_hard$xtmp = scale(data_hard[[ii]])
      sign_reg       = plm(p_realz ~ xtmp, data=data_hard,index=c("ID", "qdate"), model="within")
      coefs          = coefficients(sign_reg)
      beta           = as.numeric(coefs[1])
      beta_save[iter] = beta
      
      if (beta>0) {
        plm_hard     = plm(fc_err ~ xtmp, data=data_hard,index=c("ID", "qdate"), model="within")
        beta          = coefficients(plm_hard)
        
        data_tmp     = aggregate(x= data_hard, by =list(data_hard$qdate), FUN= "mean")
        data_tmp     = data_tmp[data_tmp$qdate<=2020.00,]
        tobs         = length(na.omit(data_tmp$xtmp))
        
        if (tobs > cut){
          std_error    = sqrt(diag(vcovDC(plm_hard, type = "HC1")))
        } else {
          std_error    = sqrt(diag(vcovHC(plm_hard, cluster = "group", type = "HC1")))  
        }
        
        out_beta_hard[iter]   = as.numeric(beta[1])
        out_se_hard[iter]     = as.numeric(std_error[1])
        out_n_hard[iter]      = nobs(plm_hard)
        out_t_hard[iter]      = tobs
        
      } else if (beta<0) {
        data_hard$xtmp = -data_hard$xtmp
        
        plm_hard     = plm(fc_err ~ xtmp, data=data_hard,index=c("ID", "qdate"), model="within")
        beta          = coefficients(plm_hard)
        
        data_tmp     = aggregate(x= data_hard, by =list(data_hard$qdate), FUN= "mean")
        data_tmp     = data_tmp[data_tmp$qdate<=2020.00,]
        tobs         = length(na.omit(data_tmp$xtmp))
        
        if (tobs > cut){
          std_error    = sqrt(diag(vcovDC(plm_hard, type = "HC1")))
        } else {
          std_error    = sqrt(diag(vcovHC(plm_hard, cluster = "group", type = "HC1")))  
        }
        
        out_beta_hard[iter]   = as.numeric(beta[1])
        out_se_hard[iter]     = as.numeric(std_error[1])
        out_n_hard[iter]      = nobs(plm_hard)
        out_t_hard[iter]      = tobs
      }
  }
}


all_hard =  data.frame(out_beta_hard, out_se_hard, out_n_hard)

clean_hard = all_hard 
term = c('NEER','IMPP','OIL','UNMP', 'STOX','TERM', 'LAG')
clean_hard$term = term
names(clean_hard)[1] <- "estimate"
names(clean_hard)[2] <- "std.error"
clean_hard = clean_hard[c(7,1,2,3,4,5,6),]

#pdf(file="_figures/figure_b1_rhs.pdf")
dwplot(clean_hard, dot_args = list(size = 2.5), whisker_args = list(size = 1.0))+
  theme_bw() + xlab("Standardized Coefficient") + ylab("") +
  geom_vline(xintercept = 0, colour = "grey60", linetype = 2) +
  ggtitle("") + xlim(-0.50, 0.50) +
  theme(plot.title = element_text(face="bold"), legend.position="none", text = element_text(size = 14, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
ggsave("_figures/figure_b2_rhs.pdf")
#dev.off()

#write.table(format(clean_hard,digits=3), file = "_tables/table_figure_3f.txt", sep = "\t")

iter          = 0
out_beta_survey  = rep(NA, n_survey)
out_se_survey    = rep(NA, n_survey)
out_n_survey     = rep(NA, n_survey)
out_t_survey     = rep(NA, n_survey)
beta_save        = rep(NA,n_survey)

for(ii in colnames(data_survey)) {
  if (ii != "qdate"  && ii != "fc_err" && ii != "ID" && ii != "p_realz" && ii != "EASPF") { 
    iter = iter + 1
    print(ii)
    data_survey$xtmp = scale(data_survey[[ii]])
    sign_reg       = plm(p_realz ~ xtmp, data=data_survey,index=c("ID", "qdate"), model="within")
    coefs          = coefficients(sign_reg)
    beta           = as.numeric(coefs[1])
    beta_save[iter] = beta
    
    if (beta>0) {
      plm_survey     = plm(fc_err ~ xtmp, data=data_survey,index=c("ID", "qdate"), model="within")
      beta           = coefficients(plm_survey)
      
      data_tmp     = aggregate(x= data_survey, by =list(data_survey$qdate), FUN= "mean")
      data_tmp     = data_tmp[data_tmp$qdate<=2020.00,]
      tobs         = length(na.omit(data_tmp$xtmp))
      
      if (tobs > cut){
        std_error    = sqrt(diag(vcovDC(plm_survey, type = "HC1")))
      } else {
        std_error    = sqrt(diag(vcovHC(plm_survey, cluster = "group", type = "HC1")))  
      }
      
      out_beta_survey[iter]   = as.numeric(beta[1])
      out_se_survey[iter]     = as.numeric(std_error[1])
      out_n_survey[iter]      = nobs(plm_survey)
      out_t_survey[iter]      = tobs
      
    } else if (beta<0) {
      data_survey$xtmp = -data_survey$xtmp
      
      plm_survey    = plm(fc_err ~ xtmp, data=data_survey,index=c("ID", "qdate"), model="within")
      beta          = coefficients(plm_survey)
     
      data_tmp     = aggregate(x= data_survey, by =list(data_survey$qdate), FUN= "mean")
      data_tmp     = data_tmp[data_tmp$qdate<=2020.00,]
      tobs         = length(na.omit(data_tmp$xtmp))
      
      if (tobs > cut){
        std_error    = sqrt(diag(vcovDC(plm_survey, type = "HC1")))
      } else {
        std_error    = sqrt(diag(vcovHC(plm_survey, cluster = "group", type = "HC1")))  
      }
      
      out_beta_survey[iter]   = as.numeric(beta[1])
      out_se_survey[iter]     = as.numeric(std_error[1])
      out_n_survey[iter]      = nobs(plm_survey)
      out_t_survey[iter]      = tobs
      
    }
  }
}

load(paste(indata0,"ea_spf_cpi_ind.Rda",sep=""))
data            = data[data$qdate<=2020.00,]
data            = ddply(data,.(qdate),transform, cons = mean(f2,na.rm = TRUE))
plm_all_cons    = plm(fc_err ~ cons, data=data,index=c("id", "qdate"), model="within")
beta            = coefficients(plm_all_cons)
std_error       = sqrt(diag(vcovHC(plm_all_cons, cluster = "group", type = "HC1")))
out_beta_survey[iter]   = as.numeric(beta[1])
out_se_survey[iter]     = as.numeric(std_error[1])
out_n_survey[iter]      = nobs(plm_survey) 

all_survey  =  data.frame(out_beta_survey, out_se_survey, out_n_survey)

clean_survey = all_survey 
term = c('FMEXP','CONS','EASPF')
clean_survey$term = term
names(clean_survey)[1] <- "estimate"
names(clean_survey)[2] <- "std.error"
clean_survey = clean_survey[c(3,2,1),]

#pdf(file="_figures/figure_b1_lhs.pdf")
dwplot(clean_survey, dot_args = list(size = 2.5), whisker_args = list(size = 1.0))+
  theme_bw() + xlab("Standardized Coefficient") + ylab("") +
  geom_vline(xintercept = 0, colour = "grey60", linetype = 2) +
  ggtitle("") + xlim(-1.00, 0.50) +
  theme(plot.title = element_text(face="bold"), legend.position="none", text = element_text(size = 14, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
ggsave("_figures/figure_b2_lhs.pdf")
#dev.off()

#write.table(format(clean_survey,digits=3), file = "_tables/table_figure_3e.txt", sep = "\t")
